from langchain_community.document_loaders import UnstructuredExcelLoader,UnstructuredPowerPointLoader,UnstructuredWordDocumentLoader
# loader = UnstructuredMarkdownLoader("./project_api.md")
loader = UnstructuredExcelLoader("./test.xlsx",mode="elements")
ppt_loader = UnstructuredPowerPointLoader("./test.pptx",mode="elements")
word_loader = UnstructuredWordDocumentLoader("./test.docx",mode="elements")

docs = loader.load()
docs_ppt = ppt_loader.load()
docs_word = word_loader.load()




print(docs)
print(len(docs))
print(docs[0].metadata)

print(docs_ppt)
print(len(docs_ppt))
print(docs_ppt[0].metadata)

print(docs_word)
print(len(docs_word))
print(docs_word[0].metadata)